import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
df = pd.read_csv('log.csv', parse_dates=['Joined', 'Left', 'Time'])
df['Time'] = df['Left'] - df['Joined']
df = df[df['Member Name'] != 'Sauron']
df
min_datetime = df['Joined'].min()
max_datetime = df['Left'].max()
min_date, max_date = min_datetime.date(), max_datetime.date()
unique_members = df['Member Name'].unique().tolist()
colormap = plt.cm.tab20b(np.linspace(0, 1, len(unique_members)))
palette = [matplotlib.colors.to_hex(c) for c in colormap]
print(f'Data collected over {max_datetime - min_datetime} with {len(unique_members)} unique members')
# sessions.index = pd.CategoricalIndex(sessions.index, unique_members)
# sessions = sessions.sort_values(ascending=False)
# sessions
# sessions.plot(use_index=True, kind='box', figsize=(20, 5), title='Average Session Length', xlabel='', ylabel='Hours', rot=0, color=colormap)
# plt.show()
plt.figure(figsize=(20, 5))
sns.boxplot(x='Member Name', y=df['Time'].dt.seconds / 3600, data=df, palette=palette)
plt.xlabel('')
plt.ylabel('Hours')
plt.show()
start = min_datetime.replace(minute=0, second=0, microsecond=0)
end = max_datetime.replace(minute=0, second=0, microsecond=0) + pd.offsets.Hour(1)
attendance_data = []
date = start
while date < end:
members = df[(df['Joined'] <= date + pd.offsets.Minute(1)) & (df['Left'] >= date)]['Member Name'].to_list()
attendance_data.append([date] + [member in members for member in unique_members])
date += pd.offsets.Minute(1)
att_df = pd.DataFrame(attendance_data, columns=['Date'] + unique_members)
axes = att_df.set_index('Date').rolling(1).mean().plot(figsize=(14, 24), yticks=[0, 1], subplots=True, sharex=True, kind='area', color=colormap)
for ax in axes:
ax.set_yticklabels(['Disconnected', 'Connected'])
plt.subplots_adjust()
plt.show()
def part_of_day(date):
if 0 <= date.hour < 6:
return '00:00 - 05:59'
elif 6 <= date.hour < 12:
return '06:00 - 11:59'
elif 12 <= date.hour < 18:
return '12:00 - 17:59'
else:
return '18:00 - 23:59'
weekday_order = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
daily_att = att_df.groupby(att_df['Date'].dt.day_name()).sum() / 60
daily_att.index = pd.CategoricalIndex(daily_att.index, weekday_order)
daily_att = daily_att.sort_index()
hourly_att = att_df.groupby(att_df['Date'].apply(part_of_day)).sum() / 60
all_att = att_df.sum() / 60
daily_att.plot(use_index=True, kind='bar', figsize=(20, 5), title='Daily Attendance', xlabel='Weekday', rot=0, color=colormap)
plt.show()
hourly_att.plot(use_index=True, kind='bar', figsize=(20, 5), title='Hourly Attendance', xlabel='Time', rot=0, color=colormap)
plt.show()
all_att.sort_values(ascending=False).plot(use_index=True, kind='bar', figsize=(20, 5), title='Total Attendance', xlabel=None, ylabel='Hours', rot=0, color=colormap)
plt.show()
corr = att_df.corr()
corr
plt.figure(figsize=(20, 20))
sns.heatmap(corr, cmap='YlGnBu')
plt.xticks(rotation=0)
plt.show()
plt.figure(figsize=(20, 20))
sns.heatmap(corr[(abs(corr) > 0.3)], cmap='YlGnBu')
plt.xticks(rotation=0)
plt.show()
pd.DataFrame(corr.unstack().sort_values().drop_duplicates())